home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Shareware Grab Bag
/
Shareware Grab Bag.iso
/
007
/
grepsmc.arc
/
GREPSMC.C
< prev
next >
Wrap
Text File
|
1986-11-30
|
19KB
|
750 lines
/*
*
*
* The information in this document is subject to change
* without notice and should not be construed as a commitment
* by Digital Equipment Corporation or by DECUS.
*
* Neither Digital Equipment Corporation, DECUS, nor the authors
* assume any responsibility for the use or reliability of this
* document or the described software.
*
* Copyright (C) 1980, DECUS
*
*
* General permission to copy or modify, but not for profit, is
* hereby granted, provided that the above copyright notice is
* included and reference made to the fact that reproduction
* privileges were granted by DECUS.
*
*/
#define SMALLC /* comment out for unix */
#define NOLOWER /* comment out if lower case passed in command line */
#define GOODHELP /* comment out to disable -h option */
#ifdef SMALLC
#include <stdiocb.h> /* addresses, orgs to link to system routines */
#include <libasm.h> /* addresses, orgs to link to printf, etc.*/
#else
#include <stdio.h>
#endif
/*
* grep.
*
* Original version ran on the Decus compiler or on vms.
* Converted for BDS compiler (under CP/M-80), 20-Jan-83, by Chris Kern.
*
* Converted to IBM PC with CI-C86 C Compiler June 1983 by David N. Smith
*
* Converted to Small C Version 2.0 (under CP/M-80) by C. Bingham 860724.1
* Compiles under BSD 4.2 cc when SMALLC and NOLOWER not defined.
* Several corrections were made to error messages and pattern checking.
* In addition, the capability to distinguish upper and lower case letters
* was added. However, in CP/M lower case is translated to upper in the
* command line. Thus when NOLOWER is defined at compilation, grep assumes
* all letters are lower case unless escaped by '\' when they are always
* assumed taken to be upper case. Similarly, in CP/M, one cannot have
* imbedded blanks or tabs in an argument on the command line. Thus when
* NOLOWER is defined, '_' and '`' are interpreted as blank and tab unless
* escaped by '\' ('\_' and '\`' ).
*
* See below for more information.
*
*/
#ifdef GOODHELP
char *docs0 = "Grep searches a file for a given pattern. Execute by";
char *docs1 = " grep [flags] regular_expression file_list";
char *docs2 = " ";
char *docs3 = "Flags are single characters preceeded by '-':";
char *docs4 = " -c Only a count of matching lines is printed";
char *docs5 =
" -f Print file name for matching lines switch; see below";
char *docs6 = " -n Each line is preceeded by its line number";
char *docs7 = " -v Only print non-matching lines";
char *docs8 = " ";
char *docs9 = "The file_list is a list of files.";
char *docs10 = " ";
char *docs11 = "The file name is printed only if more than one file is named.";
char *docs12 =
"The -f flag reverses this action (print name if one file, not if more).";
char *docs13 = " ";
char *docs14 = "\0";
#ifdef SMALLC
int docs[15];
#else
char *docs[15];
#endif
#ifdef NOLOWER
char *patdoc0 =
"The regular_expression defines the pattern to search for. Upper case";
char *patdoc1 =
"letters must be preceded by '\\'.";
#else
char *patdoc0 =
"The regular_expression defines the pattern to search for. Upper and";
char *patdoc1 =
"lower case are distinguished.";
#endif
char *patdoc2 = " ";
char *patdoc3 =
"x An ordinary character (not mentioned below) matches that character.";
char *patdoc4 =
"'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.";
char *patdoc5 =
"'$' A dollar-sign at the end of an expression matches the end of a line.";
char *patdoc6 =
"'^' A circumflex at the beginning of an expression matches the";
char *patdoc7 =
" beginning of a line. Thus '^$' matches an empty line.";
char *patdoc8 =
"'.' A period matches any character except \"newline\".";
char *patdoc9 =
"':a' A colon matches a class of characters described by the following";
char *patdoc10 =
"':d' character. \":a\" matches any alphabetic, \":d\" matches digits,";
char *patdoc11 =
"':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and";
char *patdoc12 =
"': ' other non-printing control characters.";
char *patdoc13 =
"'*' An expression followed by an asterisk matches zero or more";
char *patdoc14 =
" occurrances of that expression: \"fo*\" matches \"f\"; \"fo\" and";
char *patdoc15 =
" \"foo\"; \"t.*e\" matches \"te\", \"the\", \"table\", etc.";
char *patdoc16 =
"'+' An expression followed by a plus sign matches one or more";
char *patdoc17 =
" occurrances of that expression: \"fo+\" matches \"fo\", not \"f\".";
char *patdoc18 =
"'-' An expression followed by a minus sign matches 0 or 1 occurrances";
char *patdoc19 =
" of the expression. \"te-n\" matches \"tn\" and \"ten\", not \"teen\".";
char *patdoc20 =
"'[]' A string enclosed in square brackets matches any character in";
char *patdoc21 =
" that string, but no others. If the first character in the";
char *patdoc22 =
" string is a circumflex ('^'); the expression matches any character";
char *patdoc23 =
" except \"newline\" and the characters in the string. For";
char *patdoc24 =
" example, \"[xyz]\" matches \"x\", \"y\" or \"z\", while \"[^xyz]\"";
char *patdoc25 =
" matches \"a\" or \"b\" but not \"x\". A range of characters may be";
char *patdoc26 =
" specified by two characters separated by \"-\". Thus,";
char *patdoc27 =
" [a-z] matches any lower case letter, while [z-a] never matches.";
char *patdoc28 =
"The concatenation of regular expressions is a regular expression.";
#ifdef NOLOWER
char *patdoc29 =
"A space must be coded by '_' and a tab by '`'. To obtain these characters";
char *patdoc30 =
"use '\\_' and '\\`'.";
char *patdoc31 = "\0";
#else
char *patdoc29 = "\0";
#endif
#ifdef SMALLC
#ifdef NOLOWER
int patdoc[32];
#else
int patdoc[30];
#endif
#else
#ifdef NOLOWER
char *patdoc[32];
#else
char *patdoc[30];
#endif
#endif
#endif
#define LMAX 512
#define PMAX 256
#define ENDSTR '\n'
#define CHARR 1
#define BOL 2
#define EOL 3
#define ANY 4
#define CLASS 5
#define NCLASS 6
#define STAR 7
#define PLUS 8
#define MINUS 9
#define ALPHA 10
#define DIGIT 11
#define NALPHA 12
#define PUNCT 13
#define RANGE 14
#define ENDPAT 15
int cflag;
int fflag;
int nflag;
int vflag;
int nfile;
int debug = 0; /* Set for debug code */
char *pp;
char lbuf[LMAX];
char pbuf[PMAX];
/*******************************************************/
main(argc, argv)
int argc;
#ifdef SMALLC
int argv[]; /* no *char arrays in small c*/
#else
char *argv[];
#endif
{
char *p;
int c, i;
int gotpattern;
FILE *f;
cflag = fflag = nflag = vflag = debug = 0;
#ifdef GOODHELP
setdocs();
setpatdoc();
#endif
if (argc <= 1)
usage("No arguments");
nfile = argc-1;
gotpattern = 0;
for (i=1; i < argc; ++i) {
p = argv[i];
if (*p == '-') {
++p;
while ((c = *p++)) {
switch(tolower(c)) {
#ifdef GOODHELP
case 'h':
help(docs);
help(patdoc);
break;
#endif
case 'c':
++cflag;
break;
case 'd':
++debug;
break;
case 'f':
++fflag;
break;
case 'n':
++nflag;
break;
case 'v':
++vflag;
break;
default:
usage("Unknown flag");
}
}
argv[i] = 0;
--nfile;
} else if (!gotpattern) {
compile(p);
argv[i] = 0;
++gotpattern;
--nfile;
}
}
if (!gotpattern)
usage("No pattern");
if (nfile == 0)
grep(stdin, "stdin");
else {
fflag = fflag ^ (nfile > 1);
for (i=1; i < argc; ++i) {
if ((p = argv[i])) {
if ((f=fopen(p, "r")) == NULL)
cant(p);
else {
grep(f, p);
fclose(f);
}
}
}
}
}
/*******************************************************/
cant(s)
char *s;
{
fprintf(stderr,"grep: cannot open %s\n",s);
}
/*******************************************************/
#ifdef GOODHELP
help(hp)
#ifdef SMALLC
int hp[];
#else
char *hp[];
#endif
/*
* Give good help
*/
{
int i;
char *line;
for (i=0,line=hp[0]; *line; line=hp[++i])
{
fprintf(stderr,"%s\n",hp[i]);
}
}
#endif
/*******************************************************/
usage(s)
char *s;
{
fprintf(stderr,"grep: %s\n",s);
#ifdef GOODHELP
fputs(
"Usage: grep [-cfnvh] pattern [ file1 [ file2 ... ] ] [ > file ]\n",stderr);
fputs(" or: grep [-cfnvh] pattern < file1 [ > file ]\n",stderr);
fputs("To get help, use grep -h\n",stderr);
#else
fputs(
"Usage: grep [-cfnv] pattern [ file1 [ file2 ... ] ] [ > file ]\n",stderr);
fputs(" or: grep [-cfnv] pattern < file1 [ > file ]\n",stderr);
#endif
exit(1);
}
/*******************************************************/
compile(source)
char *source; /* Pattern to compile */
/*
* Compile the pattern into global pbuf[]
*/
{
char *s; /* Source string pointer */
char *lp; /* Last pattern pointer */
int c; /* Current character */
int o; /* Temp */
char *spp; /* Save beginning of pattern */
#ifdef SMALLC
int cclass(); /* Compile class routine */
#else
char *cclass();
#endif
s = source;
if (debug)
fprintf(stderr,"Pattern = \"%s\"\n", s);
pp = pbuf;
while ((c = *s++)) {
/*
* STAR, PLUS and MINUS are special.
*/
if (c == '*' || c == '+' || c == '-') {
if (pp == pbuf ||
(o = *lp) == BOL ||
o == EOL ||
o == STAR ||
o == PLUS ||
o == MINUS
)
badpat("Illegal occurrance op.", source, s);
store(ENDPAT);
store(ENDPAT);
spp = pp; /* Save pattern end */
while (--pp > lp) /* Move pattern down */
*pp = *(pp-1); /* one byte */
if(c == '*') *pp = STAR;
else
{
if(c == '-') *pp = MINUS;
else *pp = PLUS;
}
pp = spp; /* Restore pattern end */
continue;
}
/*
* All the rest.
*/
lp = pp; /* Remember start */
switch(c) {
case '^':
store(BOL);
break;
case '$':
store(EOL);
break;
case '.':
store(ANY);
break;
case '[':
s = cclass(source, s);
break;
case ':':
if (*s) {
c = *s++;
switch(tolower(c)) {
case 'a':
store(ALPHA);
break;
case 'd':
store(DIGIT);
break;
case 'n':
store(NALPHA);
break;
case ' ':
store(PUNCT);
break;
default:
badpat("Unknown : type", source, s);
}
break;
}
else badpat("No : type", source, s);
case '\\':
if (*s)
c = *s++;
#ifdef NOLOWER
store(CHARR);
store(c);
break;
#endif
default:
#ifdef NOLOWER
switch(c)
{
case '_':
c = ' ';
break;
case '`':
c = '\t';
break;
default:
c = tolower(c);
}
#endif
store(CHARR);
store(c);
}
}
store(ENDPAT);
store(0); /* Terminate string */
if (debug) {
for (lp = pbuf; lp < pp;) {
if ((c = (*lp++ & 255)) < ' ')
fprintf(stderr,"%2xh ", c);
else
fprintf(stderr,"%c ", c);
}
putc('\n',stderr);
}
}
/*******************************************************/
/*char * (not legal for Small C) */
#ifdef SMALLC
cclass(source, src)
#else
char *cclass(source, src)
#endif
char *source; /* Pattern start -- for error msg. */
char *src; /* Class start */
/*
* Compile a class (within [])
*/
{
char *s; /* Source pointer */
char *cp; /* Pattern start */
int c; /* Current character */
int o; /* Temp */
s = src;
o = CLASS;
if (*s == '^') {
++s;
o = NCLASS;
}
store(o);
cp = pp;
store(0); /* Byte count */
while ((c = *s++))
{
if (c == ']' && pp - cp > 1)
break;
if (c == '\\') { /* Store quoted char */
if ((c = *s++) == '\0') /* Gotta get something */
badpat("Class terminates badly", source, --s);
else store(c);
}
else if ((c == '-') &&
((pp - cp) > 1) && (*s != ']') && (*s != '\0') )
{
c = *(pp-1); /* Range start */
*(pp-1) = RANGE; /* Range signal */
store(c); /* Re-store start */
c = *s++; /* Get end char */
#ifdef NOLOWER
switch(c)
{
case '\\':
c = *s++;
if(c == '\0')
badpat("Class terminates badly", source, --s);
break;
case '_':
c = ' ';
break;
case '`':
c = '\t';
break;
default:
c = tolower(c);
}
#else
if(c == '\\') /* Store quoted character */
{
if((c = *s++) == '\0')
badpat("Class terminates badly", source, --s);
}
#endif
store(c); /* Store it */
}
else
{
#ifdef NOLOWER
switch (c)
{
case '_':
c = ' ';
break;
case '`':
c = '\t';
break;
default:
c = tolower(c);
}
#endif
store(c); /* Store normal char */
}
}
if (c != ']')
badpat("Unterminated class", source, --s);
if ((c = (pp - cp)) >= 256)
badpat("Class too large", source, s);
*cp = c;
return(s);
}
/*******************************************************/
store(op)
char op;
{
if (pp >= &pbuf[PMAX])
error("Pattern too complex\n");
*pp++ = op;
}
/*******************************************************/
badpat(message, source, stop)
char *message; /* Error message */
char *source; /* Pattern start */
char *stop; /* Pattern end */
{
fprintf(stderr,"grep: %s, pattern is \"%s\"\n",message,source);
fprintf(stderr," Stopped at byte %d \'%c\'\n",
stop-source,*(stop-1));
fputs("'\n",stderr);
error("grep: Bad pattern\n");
}
/*******************************************************/
grep(fp, fn)
FILE *fp; /* File to process */
char *fn; /* File name (for -f option) */
/*
* Scan the file for the pattern in pbuf[]
*/
{
int lno, count, m;
lno = 0;
count = 0;
while (fgets(lbuf, LMAX, fp)) {
++lno;
m = match();
if ((m && !vflag) || (!m && vflag)) {
++count;
if (!cflag) {
if (fflag)
printf("%s:",fn);
if (nflag)
printf("%d:", lno);
printf("%s", lbuf);
}
}
}
if (cflag) {
if (fflag)
printf("%s: ",fn);
printf("%d\n", count);
}
}
/*******************************************************/
match()
/*
* Match the current line (in lbuf[]), return 1 if it does.
*/
{
char *l; /* Line pointer */
#ifdef SMALLC
int pmatch();
#else
char *pmatch();
#endif
for (l = lbuf; *l; l++) {
if (pmatch(l, pbuf))
return(1);
}
return(0);
}
/*******************************************************/
#ifdef SMALLC
pmatch(line, pattern)
#else
char *pmatch(line, pattern)
#endif
char *line; /* (partial) line to match */
char *pattern; /* (partial) pattern to match */
{
char *l; /* Current line pointer */
char *p; /* Current pattern pointer */
char c; /* Current character */
char *e; /* End for STAR and PLUS match */
int op; /* Pattern operation */
int n; /* Class counter */
char *are; /* Start of STAR match */
l = line;
if (debug > 1)
fprintf(stderr,"pmatch(\"%s\")\n", line);
p = pattern;
while ((op = *p++) != ENDPAT) {
if (debug > 1)
fprintf(stderr,"byte[%d] = %xh, '%c',op = %xh\n",
l-line, *l, *l, op);
switch(op) {
case CHARR:
if (*l != *p++)
return(0);
l++;
break;
case BOL:
if (l != lbuf)
return(0);
break;
case EOL:
if (*l != ENDSTR)
return(0);
break;
case ANY:
if (*l++ == ENDSTR)
return(0);
break;
case DIGIT:
if ((c = *l++) < '0' || (c > '9'))
return(0);
break;
case NALPHA:
case ALPHA:
c = *l++;
if (c >= 'a' && c <= 'z')
break;
else if (c >= 'A' && c <= 'Z')
break;
else if(op == NALPHA && c >= '0' && c <= '9')
break;
return(0);
case PUNCT:
c = *l++;
if (c == ENDSTR || c > ' ')
return(0);
break;
case CLASS:
case NCLASS:
c = *l++;
n = *p++ & 255;
do {
if (*p == RANGE) {
p += 3;
n -= 2;
if (c >= *(p-2) && c <= *(p-1))
break;
}
else if (c == *p++)
break;
} while (--n > 1);
if ((op == CLASS) == (n <= 1))
return(0);
if (op == CLASS)
p += n - 2;
break;
case MINUS:
e = pmatch(l, p); /* Look for a match */
while (*p++ != ENDPAT); /* Skip over pattern */
if (e) /* Got a match? */
l = e; /* Yes, update string */
break; /* Always succeeds */
case PLUS: /* One or more ... */
if ((l = pmatch(l, p)) == 0)
return(0); /* Gotta have a match */
case STAR: /* Zero or more ... */
are = l; /* Remember line start */
while ((*l != ENDSTR) && ((e = pmatch(l, p))))
l = e; /* Get longest match */
while (*p++ != ENDPAT); /* Skip over pattern */
while (l >= are) { /* Try to match rest */
if ((e = pmatch(l, p)))
return(e);
--l; /* Nope, try earlier */
}
return(0); /* Nothing else worked */
default:
fprintf(stderr,"Bad op code %d\n", op);
error("Cannot happen -- match\n");
}
}
return(l);
}
/*******************************************************/
error(s)
char *s;
{
fputs(s,stderr);
exit(1);
}
tolower(c)
char c;
{
if(c >= 'A' && c <= 'Z')c = c + ('a' - 'A');
return c;
}
#ifdef GOODHELP
setdocs()
{
docs[0] = docs0;docs[1] = docs1;docs[2] = docs2;
docs[3] = docs3;docs[4] = docs4;docs[5] = docs5;
docs[6] = docs6;docs[7] = docs7;docs[8] = docs8;
docs[9] = docs9;docs[10]=docs10;docs[11]=docs11;
docs[12]=docs12;docs[13]=docs13;docs[14]=docs14;
}
setpatdoc()
{
patdoc[0] = patdoc0;patdoc[1] = patdoc1;patdoc[2] = patdoc2;
patdoc[3] = patdoc3;patdoc[4] = patdoc4;patdoc[5] = patdoc5;
patdoc[6] = patdoc6;patdoc[7] = patdoc7;patdoc[8] = patdoc8;
patdoc[9] = patdoc9;patdoc[10]=patdoc10;patdoc[11]=patdoc11;
patdoc[12]=patdoc12;patdoc[13]=patdoc13;patdoc[14]=patdoc14;
patdoc[15]=patdoc15;patdoc[16]=patdoc16;patdoc[17]=patdoc17;
patdoc[18]=patdoc18;patdoc[19]=patdoc19;patdoc[20]=patdoc20;
patdoc[21]=patdoc21;patdoc[22]=patdoc22;patdoc[23]=patdoc23;
patdoc[24]=patdoc24;patdoc[25]=patdoc25;patdoc[26]=patdoc26;
patdoc[27]=patdoc27;patdoc[28]=patdoc28;patdoc[29]=patdoc29;
#ifdef NOLOWER
patdoc[30]=patdoc30;patdoc[31]=patdoc31;
#endif
}
#endif